library(tidyverse)
library(car)

## note: folder includes a survey of just veterans
## we don't include the veterans 
## sample, only public sample 

## set working directory to 
## Dataverse replication folder

# read in public dataset
survey <- read.table("Harris_Data/Harris 1979 Vietnam War Veterans Survey, study no. 792801/harris_s792801_public_spss.tab", header = TRUE)

# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- as.character(792801)

# study year (year)
survey$year <- 1979

# geographic data (urban)
survey$urban <- NA

# geographic data (region)
survey$region <- NA

# respondent head of household (hh)
survey$hh <- as.character(car::recode(survey$F6A, "c(1, 3) = 'Yes'; 2 = 'No'; 4 = 'Not sure'; 5 = 'No answer/refused'"))
table(survey$hh)

# increasing inequality (inequality)
survey$inequality <- as.character(car::recode(survey$Q19_B, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; 4 = 'No answer/refused'"))
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
survey$union.self <- as.character(car::recode(survey$F3A, "1 = 'Yes'; 2 = 'No'; 3 = 'Not sure'; 4 = 'No answer/refused'"))
table(survey$union.self)

# union (union.other)
survey$union.other <- as.character(car::recode(survey$F3B, "1 = 'Yes'; 2 = 'No'; 3 = 'Not sure'; else = 'No answer/refused'"))
table(survey$union.other)

# employment (employed)
survey$employed <- (dplyr::recode(survey$F7A,
                          `1` = "Working full time (35 hours a week or more",
                          `2` = "Working part time (less than 35 hours a week)",
                          `3` = "Holding a job, but not at work because of temporary illness, 
                          on sick leave, vacation, labor dispute, on strike, 
                          bad weather, temporary lay-off, etc",
                          `4` = "Unemployed",
                          `5` = "Retired",
                          `6` = "Student",
                          `7` = "Housewife/husband",
                          `8` = "Disabled, too ill to work",
                          `9` = "On duty in armed services",
                          `10` = "Other (SPECIFY)",
                          `11`= "Not sure",
                          `12` = "No answer/refused"))
table(survey$employed)

## empl self
survey$employed.self <- (dplyr::recode(survey$F5A,
                                             `1` = "Working full time (35 hours a week or more",
                                             `2` = "Working part time (less than 35 hours a week)",
                                             `3` = "Holding a job, but not at work because of temporary illness, 
                          on sick leave, vacation, labor dispute, on strike, 
                          bad weather, temporary lay-off, etc",
                                             `4` = "Unemployed",
                                             `5` = "Retired",
                                             `6` = "Student",
                                             `7` = "Housewife/husband",
                                             `8` = "Disabled, too ill to work",
                                             `9` = "On duty in armed services",
                                             `10` = "Other (SPECIFY)",
                                             `11`= "Not sure",
                                             `12` = "No answer/refused"))
summary(survey$employed.self)

# occupation
# survey$occupation <- survey$F7B  ## cannot find this variable
survey$occupation <- NA

## occ self
survey$occupation.self <- NA

# household size (hhsize)
survey$hhsize <- as.character(survey$F8A)
table(survey$hhsize)

# education (educ)
table(survey$F4)
survey$educ <- car::recode(survey$F4, 
                           "1 = 'No formal schooling';
                           2 = '1-7 years completed';
                           3 = '8 years completed';
                           4 = 'Some high school';
                           5 = 'High school graduate';
                           6 = 'Some college';
                           7 = 'Two year college graduate';
                           8 = 'Four year college graduate';
                           9 = 'Post graduate';
                           10 = 'Trade/technical/vocational after high school';
                           11 = 'Not sure';
                           else = 'NA'")                          
table(survey$educ)

# household income (income)
survey$income <- (dplyr::recode(survey$F9,
                                      `1` = "Less than $2,000",
                                      `2` = "$2,000-$2,999",
                                      `3` = "$3,000-$4,999",
                                      `4` = "$5,000-$7,499",
                                      `5` = "$7,500-$9,999",
                                      `6` = "$10,000-$14,999",
                                      `7` = "$15,000-$19,999",
                                      `8` = "$20,000-$24,999",
                                      `9` = "$25,000-$34,999",
                                      `10` = "$35,000-$49,999",
                                      `11` = "$50,000 or more",
                                      `12` = "Not sure",
                                      `13` = "No answer/refused"))
table(survey$income)               

# age
survey$age <- (dplyr::recode(survey$F2,
                                   `1` = "18-24",
                                   `2` = "25-29",
                                   `3` = "30-34",
                                   `4` = "35-39",
                                   `5` = "40-44",
                                   `6` = "45-49",
                                   `7` = "50-54",
                                   `8` = "55-59",
                                   `9` = "60-64",
                                   `10` = "65-69",
                                   `11` = "70 and over",
                                   `12` = "Refused"))
table(survey$age)

# race
survey$race <- (dplyr::recode(survey$F10,
                                    `1` = "White, but not Hispanic",
                                    `2` = "Black, but not Hispanic",
                                    `3` = "Spanish-American (Mexican, Cuban,
                                    Puerto Rican, Central or South American",
                                    `4` = "Asian (Oriental) or Pacific Islander",
                                    `5` = "Amerian Indian or Alaskan native",
                                    `6` = "Refused"))
table(survey$race)

# politics (party)
survey$party <- NA

# politics (ideology)
survey$ideology <- NA

# gender
survey$gender <- (dplyr::recode(survey$F11,
                                      `1` = "Male",
                                      `2` = "Female"))
table(survey$gender)

# religion
# survey$religion <- survey$F8A
survey$religion <- NA

# factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- car::recode(survey$Q19_A, "1 = 'Feel'; 2 = 'Don t feel'; 3 = 'Not sure'; 4 = 'No answer/refused'")
survey$dontcount <- NA
survey$leftout <- NA

## question place
survey$question_place <- "no party question"

# subset
survey_pub <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                        "inequality", "inequality.variable", "union.self", "union.other",
                        "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                        "age", "race", "party", "ideology", "gender", "religion",
                        "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                        "question_place")]

# recode pid
survey_pub$pid <- c(1:nrow(survey_pub))

## save file
#saveRDS(survey_pub, file = "Harris_Data/survey_792801.rds")
